import os
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy.stats import trim_mean
from scipy.stats import kurtosis, skew
# MANUAL INPUTS
#List portfolio securities here:
portfolio_with_candidate = ["SPY_history.csv", "XLU_history.csv", "XLF_history.csv"]
portfolio_without_candidate= ["XLF_history.csv","SPY_history.csv"]
candidate = ["XLU_history.csv"]
# Define weights_with_candidate (make sure the keys match the column names):
weights_with_candidate = {
'Return_SPY': 0.3, # 50% weight to SPY
'Return_XLU': 0.5, # 30% weight to XLU
'Return_XLF': 0.2 # 20% weight to XLF
}
weights_without_candidate = {
"Return_SPY": 0.6, # Adjust weights to exclude candidate
"Return_XLF": 0.4
}
observations_to_keep= 1500
def process_portfolio_data(securities, weights_with_candidate, weights_without_candidate):
# Define the data folder path
data_folder = os.path.join(os.path.expanduser("~/Desktop/Trading"), "Data")
# Initialize an empty list to store the DataFrames
data_frames = []
# Iterate over the list of security files
for csv in securities:
# Construct the full file path
file_path = os.path.join(data_folder, csv)
# Read CSV data into a DataFrame
data = pd.read_csv(file_path)
# Convert 'Date' column to datetime format
data['Date'] = pd.to_datetime(data['Date'])
# Extract security name from the file name (e.g., "SPY" from "SPY_history.csv")
security_name = csv.split('_')[0]
# Add a suffix to column names so each security’s columns remain distinct
data = data.add_suffix(f'_{security_name}')
# Calculate day-over-day price change for the 'Close' column as the return
data[f'Return_{security_name}'] = data[f'Close/Last_{security_name}'].pct_change()
# Append DataFrame to the list
data_frames.append(data)
# Combine all DataFrames column-wise
combined_data = pd.concat(data_frames, axis=1)
# Calculate the portfolio’s uniform daily return: the average of all individual returns
return_columns = [col for col in combined_data.columns if col.startswith("Return_")]
combined_data["portfolio_uniform_daily_return"] = combined_data[return_columns].mean(axis=1)
# Normalize weights for the portfolio with candidate
total_weight_with = sum(weights_with_candidate.values())
normalized_weights_with = {col: weight / total_weight_with for col, weight in weights_with_candidate.items()}
# Calculate the portfolio’s weighted daily return (with candidate)
combined_data["portfolio_weighted_daily_return"] = sum(
combined_data[col] * weight
for col, weight in normalized_weights_with.items()
if col in combined_data.columns
)
# Normalize weights for the portfolio without candidate
total_weight_without = sum(weights_without_candidate.values())
normalized_weights_without = {col: weight / total_weight_without for col, weight in weights_without_candidate.items()}
# Calculate the portfolio’s weighted daily return without the candidate
combined_data["portfolio_weighted_daily_return_without_candidate"] = sum(
combined_data[col] * weight
for col, weight in normalized_weights_without.items()
if col in combined_data.columns
)
# Add an Original_Index column (optional)
combined_data['Original_Index'] = combined_data.index
return combined_data
# Process the data for different portfolios by passing both dictionaries
portfolio_with_candidate_df = process_portfolio_data(portfolio_with_candidate, weights_with_candidate, weights_without_candidate)
portfolio_without_candidate_df = process_portfolio_data(portfolio_without_candidate, weights_with_candidate, weights_without_candidate)
candidate_df = process_portfolio_data(candidate, weights_with_candidate, weights_without_candidate)
portfolio_with_candidate_df.columns
Index(['Date_SPY', 'Close/Last_SPY', 'Volume_SPY', 'Open_SPY', 'High_SPY',
'Low_SPY', 'Return_SPY', 'Date_XLU', 'Close/Last_XLU', 'Volume_XLU',
'Open_XLU', 'High_XLU', 'Low_XLU', 'Return_XLU', 'Date_XLF',
'Close/Last_XLF', 'Volume_XLF', 'Open_XLF', 'High_XLF', 'Low_XLF',
'Return_XLF', 'portfolio_uniform_daily_return',
'portfolio_weighted_daily_return',
'portfolio_weighted_daily_return_without_candidate', 'Original_Index'],
dtype='object')
portfolio_with_candidate_df.head(1)
| Date_SPY | Close/Last_SPY | Volume_SPY | Open_SPY | High_SPY | Low_SPY | Return_SPY | Date_XLU | Close/Last_XLU | Volume_XLU | ... | Close/Last_XLF | Volume_XLF | Open_XLF | High_XLF | Low_XLF | Return_XLF | portfolio_uniform_daily_return | portfolio_weighted_daily_return | portfolio_weighted_daily_return_without_candidate | Original_Index | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2025-01-24 | 607.97 | 34604690 | 609.81 | 610.78 | 606.8 | NaN | 2025-01-24 | 79.49 | 8385099 | ... | 50.85 | 39088670 | 50.53 | 50.9 | 50.48 | NaN | NaN | NaN | NaN | 0 |
1 rows × 25 columns
# Add function to sort DataFrame by Original_Index
def sort_by_original_index(data):
return data.sort_values(by='Original_Index', ascending=False)
# Sort the DataFrames by Original_Index
portfolio_with_candidate_df = sort_by_original_index(portfolio_with_candidate_df)
portfolio_without_candidate_df = sort_by_original_index(portfolio_without_candidate_df)
candidate_df = sort_by_original_index(candidate_df)
# Function to drop observations after a specified number
def drop_excess_observations(dataframe, observations_to_keep):
"""
Returns a new DataFrame with only the first 'observations_to_keep' rows.
Args:
dataframe (pd.DataFrame): Input DataFrame to truncate.
observations_to_keep (int): Number of rows to retain.
Returns:
pd.DataFrame: Truncated DataFrame.
"""
return dataframe.iloc[:observations_to_keep].reset_index(drop=True)
# Apply the function to each DataFrame
portfolio_with_candidate_df = drop_excess_observations(portfolio_with_candidate_df, observations_to_keep)
portfolio_without_candidate_df = drop_excess_observations(portfolio_without_candidate_df, observations_to_keep)
candidate_df = drop_excess_observations(candidate_df, observations_to_keep)
# Automatically detect all return columns for the current portfolio
# DON'T THINK I NEED THE SECOND HALF OF THIS LINE OF CODE " and col != "portfolio_uniform_daily_return""
asset_returns_columns = [col for col in portfolio_with_candidate_df.columns if col.startswith("Return_") and col != "portfolio_uniform_daily_return"]
def generate_efficient_frontier_dirichlet(returns_df, asset_columns, num_samples=100):
"""
Generates EFF by randomly assigning weights which will always sum to 1.
Parameters:
returns_df (pd.DataFrame): DataFrame with asset return columns.
asset_columns (list): List of asset return column names.
num_samples (int): Number of random portfolios to sample.
Returns:
pd.DataFrame: Each row is a portfolio with its weights, mean return, and volatility.
"""
n_assets = len(asset_columns)
results = []
for _ in range(num_samples):
# Generate a random weight vector that sums to 1
weights = np.random.dirichlet(np.ones(n_assets))
# Portfolio return time series
port_return_series = sum(returns_df[col] * weight for col, weight in zip(asset_columns, weights))
# Portfolio metrics
mean_return = port_return_series.mean()
volatility = port_return_series.std()
# Store weights and metrics
record = {f"w_{col.split('_')[1]}": round(weight, 3) for col, weight in zip(asset_columns, weights)}
record.update({
"mean_return": mean_return,
"volatility": volatility
})
results.append(record)
frontier_df = pd.DataFrame(results)
return frontier_df
frontier_df = generate_efficient_frontier_dirichlet(portfolio_with_candidate_df, asset_returns_columns, num_samples=1000)
print(frontier_df.head(2))
w_SPY w_XLU w_XLF mean_return volatility 0 0.718 0.053 0.228 -0.000293 0.012115 1 0.306 0.431 0.262 -0.000187 0.011633
#CHANGE COLOR SCHEME TO BE KUROTOSIS OR SKEW... SHARPE RATIO IS POITNLESS HERE SINCE YOU'RE PLOTTING THE EFF
weight_columns = [col for col in frontier_df.columns if col.startswith("w_")]
# Create an interactive scatter plot with Plotly Express
fig = px.scatter(
frontier_df,
x="volatility",
y="mean_return",
color=frontier_df["mean_return"] / frontier_df["volatility"], # Optional: use a color scale to represent Sharpe ratio
color_continuous_scale="Viridis",
hover_data=weight_columns + ["mean_return", "volatility"], # Display these metrics on hover
title="Efficient Frontier: Mean Return vs. Volatility"
)
# Update the layout for axis labels
fig.update_layout(
xaxis_title="Portfolio Volatility (Standard Deviation)",
yaxis_title="Portfolio Mean Return"
)
# Display the interactive plot
fig.show()
# Example: Adding skewness and kurtosis to frontier_df
frontier_df['skewness'] = frontier_df.apply(lambda row: skew(
sum(portfolio_with_candidate_df[col] * row[f"w_{col.split('_')[1]}"]
for col in asset_returns_columns)
), axis=1)
frontier_df['kurtosis'] = frontier_df.apply(lambda row: kurtosis(
sum(portfolio_with_candidate_df[col] * row[f"w_{col.split('_')[1]}"]
for col in asset_returns_columns)
), axis=1)
# Create a 3D scatter plot with skewness and kurtosis
fig = px.scatter_3d(
frontier_df,
x="volatility",
y="mean_return",
z="skewness", # Skewness on the Z-axis
color="kurtosis", # Kurtosis as the color scale
color_continuous_scale="Viridis",
hover_data=weight_columns + ["mean_return", "volatility", "skewness", "kurtosis"],
title="3D Efficient Frontier: Volatility, Mean Return, Skewness, and Kurtosis"
)
# Update the layout to make the chart bigger by adjusting width and height
fig.update_layout(
width=1000, # Set the width of the chart (e.g., 1000 pixels)
height=800, # Set the height of the chart (e.g., 800 pixels)
scene=dict(
xaxis_title="Portfolio Volatility (Standard Deviation)",
yaxis_title="Portfolio Mean Return",
zaxis_title="Portfolio Skewness"
),
coloraxis_colorbar=dict(title="Kurtosis") # Label the colorbar
)
# Display the interactive 3D plot
fig.show()
#USE THIS FUNCTION/DATAFRAMES LATER WHEN YOU WANT TO ESTABLISH THE STATIONARITY OF THE KURTOSIS, SKEW, MEAN AND STANDARD DEVIATION
def calculate_cumulative_statistics(dataframe, portfolio_list, weights_dictionary):
"""
Calculates weighted statistics (mean, standard deviation, skewness, kurtosis)
cumulatively, row by row up to the current row.
Returns:
pd.DataFrame: A new DataFrame with cumulative statistics for each row.
"""
# Validate input types
if not isinstance(portfolio_list, list):
raise ValueError("Expected portfolio_list to be a list of filenames.")
# Extract security names dynamically from the portfolio list
securities = [csv.split('_')[0] for csv in portfolio_list]
return_keys = [f"Return_{security}" for security in securities]
# Rescale the weights to exclude cash (normalize weights to sum to 1)
invested_weights = {key: weights_dictionary.get(key, 0) for key in return_keys}
total_invested_weight = sum(invested_weights.values())
if total_invested_weight > 0:
normalized_weights = {key: weight / total_invested_weight for key, weight in invested_weights.items()}
else:
print("Warning: Total invested weight is zero. Cannot rescale weights.")
return None
# Initialize columns for cumulative statistics
stats_df = pd.DataFrame(index=dataframe.index)
stats_df["cumulative_weighted_mean"] = 0
stats_df["cumulative_weighted_std_dev"] = 0
stats_df["cumulative_weighted_skewness"] = 0
stats_df["cumulative_weighted_kurtosis"] = 0
# Iterate over rows cumulatively
for i in range(len(dataframe)):
weighted_mean = 0
weighted_std_dev = 0
weighted_skewness = 0
weighted_kurtosis = 0
for return_key in return_keys:
if return_key in dataframe.columns:
# Include all rows up to the current row
cumulative_data = dataframe[return_key].iloc[:i + 1].dropna()
if len(cumulative_data) > 1: # Ensure enough data for meaningful statistics
mean = cumulative_data.mean()
std_dev = cumulative_data.std()
skewness = skew(cumulative_data)
kurt = kurtosis(cumulative_data)
weight = normalized_weights.get(return_key, 0)
# Apply weights to the metrics
weighted_mean += mean * weight
weighted_std_dev += std_dev * weight
weighted_skewness += skewness * weight
weighted_kurtosis += kurt * weight
# Assign cumulative statistics to the row
stats_df.loc[i, "cumulative_weighted_mean"] = weighted_mean
stats_df.loc[i, "cumulative_weighted_std_dev"] = weighted_std_dev
stats_df.loc[i, "cumulative_weighted_skewness"] = weighted_skewness
stats_df.loc[i, "cumulative_weighted_kurtosis"] = weighted_kurtosis
return stats_df
print("Portfolio with Candidate DataFrame")
print(calculate_cumulative_statistics(portfolio_with_candidate_df, portfolio_with_candidate, weights_with_candidate).iloc[-1])
print() # Blank line
print("Portfolio without Candidate DataFram")
print(calculate_cumulative_statistics(portfolio_without_candidate_df, portfolio_with_candidate, weights_with_candidate).iloc[-1])
print() # Blank line
print("Candidate DataFrame")
print(calculate_cumulative_statistics(candidate_df, portfolio_with_candidate, weights_with_candidate).iloc[-1])
Portfolio with Candidate DataFrame cumulative_weighted_mean -0.000178 cumulative_weighted_std_dev 0.013109 cumulative_weighted_skewness 1.016090 cumulative_weighted_kurtosis 19.852011 Name: 1499, dtype: float64 Portfolio without Candidate DataFram cumulative_weighted_mean -0.000140 cumulative_weighted_std_dev 0.006623 cumulative_weighted_skewness 0.648067 cumulative_weighted_kurtosis 9.678355 Name: 1499, dtype: float64 Candidate DataFrame cumulative_weighted_mean -0.000038 cumulative_weighted_std_dev 0.006486 cumulative_weighted_skewness 0.368022 cumulative_weighted_kurtosis 10.173656 Name: 1499, dtype: float64
def plot_weighted_return_distributions(returns_with_candidate, returns_without_candidate, stats):
"""
Plots the PDF (kernel density estimate) with histograms underneath for two distributions:
"With Candidate" and "Without Candidate".
Args:
returns_with_candidate (pd.Series): Weighted returns for "With Candidate" portfolio.
returns_without_candidate (pd.Series): Weighted returns for "Without Candidate" portfolio.
stats (dict): Dictionary containing mean, std dev, kurtosis, and skew for each portfolio.
"""
# Create a dictionary for the data
returns_data = {
"With Candidate": returns_with_candidate.dropna(),
"Without Candidate": returns_without_candidate.dropna()
}
# Initialize a figure
plt.figure(figsize=(12, 6))
colors = ["blue", "green"]
# Overlay histograms and density plots
for (label, data), color in zip(returns_data.items(), colors):
sns.histplot(data, bins=50, kde=True, color=color, label=f"{label}\n"
f"Mean: {stats[label]['Mean']:.4f}\n"
f"Std Dev: {stats[label]['Std Dev']:.4f}\n"
f"Kurtosis: {stats[label]['Kurtosis']:.4f}\n"
f"Skew: {stats[label]['Skew']:.4f}",
alpha=0.4) # Adjust opacity for histograms
# Customize plot
plt.legend(fontsize=10)
plt.title("Portfolio Weighted Return Distributions")
plt.xlabel("Daily Weighted Return")
plt.ylabel("Frequency")
plt.grid(True)
plt.show()
weighted_returns_data = {
"With Candidate": portfolio_with_candidate_df["portfolio_weighted_daily_return"],
"Without Candidate": portfolio_without_candidate_df["portfolio_weighted_daily_return_without_candidate"]
}
stats = {
label: {
"Mean": np.mean(data.dropna()),
"Std Dev": np.std(data.dropna()),
"Kurtosis": kurtosis(data.dropna()),
"Skew": skew(data.dropna())
}
for label, data in weighted_returns_data.items()
}
# Call the adjusted function
plot_weighted_return_distributions(
returns_with_candidate=portfolio_with_candidate_df["portfolio_weighted_daily_return"],
returns_without_candidate=portfolio_without_candidate_df["portfolio_weighted_daily_return_without_candidate"],
stats=stats
)
def trimmed_std_dev(data, trim_percent=0.02):
"""
Calculate the trimmed standard deviation for a portfolio's average daily return.
Parameters:
- data (pd.DataFrame): DataFrame containing daily returns of all securities
- trim_percent (float): Percentage of observations to trim from each end (default 10%)
Returns:
- float: Trimmed standard deviation of the portfolio
"""
# Select only return columns (avoid including other numerical data)
return_columns = [col for col in data.columns if col.startswith("Return_")]
if not return_columns:
raise ValueError("No return columns found in the dataset!")
# Compute the portfolio's daily return (average of all securities' daily returns)
data["portfolio_uniform_daily_return"] = data[return_columns].mean(axis=1)
# Extract the portfolio daily returns as a series
portfolio_returns = data["portfolio_uniform_daily_return"].dropna().values
# Trim the extreme observations
trim_count = int(len(portfolio_returns) * trim_percent)
sorted_returns = np.sort(portfolio_returns)
trimmed_returns = sorted_returns[trim_count:-trim_count] # Trim bottom & top values
# Compute and return standard deviation of the trimmed dataset
return np.std(trimmed_returns, ddof=1)
# Compute trimmed standard deviations
trimmed_std_portfolio_without_candidate = trimmed_std_dev(portfolio_without_candidate_df, trim_percent=0.02)
trimmed_std_portfolio_with_candidate = trimmed_std_dev(portfolio_with_candidate_df, trim_percent=0.02)
trimmed_std_candidate = trimmed_std_dev(candidate_df, trim_percent=0.02)
def gini_mean_difference(data, column):
"""
Calculate the Gini mean difference for a specified column in a DataFrame.
Args:
data (pd.DataFrame): The DataFrame containing the data.
column (str): The column name for which to calculate the Gini mean difference.
Returns:
float: The Gini mean difference.
"""
# Extract the specified column values
values = data[column].dropna().values
# Calculate the absolute differences between all pairs of elements
diff_matrix = np.abs(np.subtract.outer(values, values))
# Calculate the mean of the absolute differences
gini_mean_diff = np.mean(diff_matrix)
return gini_mean_diff
gini_portfolio_with_candidate = gini_mean_difference(portfolio_with_candidate_df, 'portfolio_uniform_daily_return')
gini_portfolio_without_candidate = gini_mean_difference(portfolio_without_candidate_df, 'portfolio_uniform_daily_return')
gini_candidate = gini_mean_difference(candidate_df, 'portfolio_uniform_daily_return')
DOUBLE CHECK THE STATIONARITY OF GINI MEAN COEFFICENT AND TRIMMED STANDARD DEVIATION
def plot_clustered_bar_chart_with_labels(gini_values, trimmed_std_values, labels):
"""
Plots a clustered bar chart with value labels for Gini mean difference
and trimmed standard deviation for three portfolios.
Args:
gini_values (list): A list of Gini mean differences for the portfolios.
trimmed_std_values (list): A list of trimmed standard deviations for the portfolios.
labels (list): A list of labels for the portfolios.
"""
# Number of portfolios
n_portfolios = len(labels)
# Bar positions
x = np.arange(n_portfolios) # X-axis positions for the groups
bar_width = 0.35 # Width of each bar
# Plot bars
plt.figure(figsize=(10, 6))
gini_bars = plt.bar(x - bar_width / 2, gini_values, width=bar_width, label='Gini Mean Difference', color='skyblue')
std_bars = plt.bar(x + bar_width / 2, trimmed_std_values, width=bar_width, label='Trimmed Std Dev', color='lightcoral')
# Add labels and title
plt.title('Comparison of Portfolio Statistics')
plt.xlabel('Portfolios')
plt.ylabel('Values')
plt.xticks(x, labels) # Set portfolio labels for x-axis ticks
plt.legend()
plt.grid(axis="y", linestyle="--", alpha=0.5)
# Add value labels to the bars
for bar in gini_bars:
height = bar.get_height()
plt.text(bar.get_x() + bar.get_width() / 2, height, f'{height:.4f}', ha='center', va='bottom', fontsize=10)
for bar in std_bars:
height = bar.get_height()
plt.text(bar.get_x() + bar.get_width() / 2, height, f'{height:.4f}', ha='center', va='bottom', fontsize=10)
# Display the plot
plt.tight_layout()
plt.show()
# Define the calculated values
gini_values = [gini_portfolio_with_candidate, gini_portfolio_without_candidate, gini_candidate]
trimmed_std_values = [trimmed_std_portfolio_with_candidate, trimmed_std_portfolio_without_candidate, trimmed_std_candidate]
labels = ["With Candidate", "Without Candidate", "Candidate Only"]
# Call the function to plot the bar chart
plot_clustered_bar_chart_with_labels(gini_values, trimmed_std_values, labels)
Shape of the Distribution (Skewness, Kurtosis, Standard Deviation, Mean): Since you're working with returns, which are essentially first-differenced prices, you're correct that they've already been detrended to some extent. This makes the assumption of mean-variance stationarity less critical.
Even though financial returns can still exhibit non-stationary behavior (e.g., volatility clustering), their distributional properties (like skewness and kurtosis) are relatively stable over time if calculated over a large enough sample.
In this case, you don't necessarily need to account for stationarity explicitly unless your analysis spans vastly different market conditions (like a bull market vs. a bear market).
You're absolutely right that for metrics like beta and covariance, which depend on relationships between securities, stationarity is more crucial. If the underlying data isn't stationary, these metrics could fluctuate unpredictably over time, making them unreliable.
You're absolutely correct that if your focus is on the stability of variance, covariance, and beta, the concept of mean stationarity isn't particularly relevant.Don't use dickey-fuller or ADF
# Select only the "Close/Last_" columns
close_columns = [col for col in portfolio_with_candidate_df.columns if col.startswith('Close/Last_')]
# Calculate daily percentage returns
returns = portfolio_with_candidate_df[close_columns].pct_change()
# Drop NaN values (from the first row caused by pct_change)
returns = returns.dropna()
# 1. Correlation Matrix
correlation_matrix = returns.corr()
# 2. Covariance Matrix
covariance_matrix = returns.cov()
# 3. Beta Matrix - Adjust to have the same shape as correlation and covariance matrices
benchmark = close_columns[0]
betas = {}
# Initialize beta_matrix with NaN values to match the size of the correlation and covariance matrices
beta_matrix = pd.DataFrame(np.nan, index=close_columns, columns=close_columns)
for col in close_columns:
for row in close_columns:
if col == row:
beta_matrix.loc[row, col] = 1
else:
beta = covariance_matrix.loc[col, benchmark] / covariance_matrix.loc[benchmark, benchmark]
beta_matrix.loc[col, benchmark] = beta
# Assuming 'correlation_matrix' and 'covariance_matrix_mu' are already defined,
# and you have already computed these matrices.
# Create a dictionary with only the matrices you want to plot together
matrices = {
"Correlation Matrix": correlation_matrix,
"Covariance Matrix (mu)": covariance_matrix_mu
}
num_matrices = len(matrices)
# Create subplots dynamically (without Beta Matrix)
fig = make_subplots(
rows=1, cols=num_matrices,
subplot_titles=list(matrices.keys()),
column_widths=[1/num_matrices] * num_matrices,
shared_yaxes=True,
shared_xaxes=True
)
# Adjust x positions for colorbars below each heatmap
x_positions = np.linspace(0.15, 0.85, num_matrices) # evenly spread
# Add each matrix as a heatmap dynamically
for i, (title, matrix) in enumerate(matrices.items(), start=1):
fig.add_trace(
go.Heatmap(
z=matrix.values,
x=matrix.columns,
y=matrix.columns,
colorscale="RdBu",
colorbar=dict(
title=title.split()[0], # e.g., "Correlation" or "Covariance"
tickvals=[matrix.values.min(), 0, matrix.values.max()],
yanchor="top",
y=-0.25,
x=x_positions[i - 1],
xanchor="center",
orientation="v"
),
text=matrix.values.round(2),
texttemplate="%{text}",
showscale=True,
hoverinfo="skip"
),
row=1, col=i
)
# Update overall layout
fig.update_layout(
title="Correlation and Covariance (mu) Matrices",
height=750,
showlegend=False,
title_x=0.5
)
fig.show()
# Define your benchmark (assumed to be the first asset)
benchmark = close_columns[0] # e.g., 'Close/Last_SPY'
# Extract the single column from the beta matrix as a DataFrame
beta_column = beta_matrix[[benchmark]] # double brackets keep it as a DataFrame
# Create a heatmap for just that column
fig_beta = go.Figure(
go.Heatmap(
z=beta_column.values,
x=beta_column.columns,
y=beta_column.index,
colorscale="RdBu",
colorbar=dict(
title=f"Beta vs {benchmark}",
tickvals=[beta_column.values.min(), 0, beta_column.values.max()]
),
text=beta_column.values.round(2),
texttemplate="%{text}",
hoverinfo="skip"
)
)
# Update layout for the beta column heatmap
fig_beta.update_layout(
title=f"Beta for Each Asset Relative to {benchmark}",
xaxis_title=f"Benchmark: {benchmark}",
yaxis_title="Assets",
width=400, # Adjust width as needed
height=600 # Adjust height as needed
)
fig_beta.show()